{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "isc = pd.read_csv(\"information_schema.columns.csv\")\n",
    "isc\n",
    "# for SQL prepared\n",
    "# https://data.stackexchange.com/stackoverflow/query/1023678\n",
    "# isc = pd.read_csv(\"SEDE_tabsColsTypes.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "isc.TABLE_SCHEMA.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "isc.DATA_TYPE.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "isc.CHARACTER_MAXIMUM_LENGTH.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# find the maximum character length specified\n",
    "max_len = isc.CHARACTER_MAXIMUM_LENGTH.max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# set the -1 values to the maximum character length specified\n",
    "isc.loc[isc.CHARACTER_MAXIMUM_LENGTH == -1, 'CHARACTER_MAXIMUM_LENGTH'] = max_len\n",
    "isc.loc[isc.IS_NULLABLE == 'NO', 'is_null'] = 'NOT NULL'\n",
    "isc.is_null.fillna('NULL', inplace=True)\n",
    "isc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def len_prec(row):\n",
    "    if 'varchar' in row.DATA_TYPE: \n",
    "        val = row.DATA_TYPE + \"(\" + str(int(row.CHARACTER_MAXIMUM_LENGTH)) + \")\"\n",
    "    elif 'int' in row.DATA_TYPE:\n",
    "        val = row.DATA_TYPE + \"(\" + str(int(row.NUMERIC_PRECISION)) + \")\"\n",
    "    else:\n",
    "        val = row.DATA_TYPE\n",
    "    return val\n",
    "\n",
    "isc['datatype_lenprec'] = isc.transform(len_prec, axis=1)\n",
    "isct = isc.loc[isc.TABLE_NAME == 'Users', ['TABLE_NAME', 'ORDINAL_POSITION', 'COLUMN_NAME',\n",
    "       'DATA_TYPE', 'CHARACTER_MAXIMUM_LENGTH', 'datatype_lenprec']]\n",
    "isct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# list of all the tables\n",
    "ist = isc.groupby('TABLE_NAME').min().reset_index()['TABLE_NAME'].to_frame()\n",
    "ist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_attrib(grp):\n",
    "    return \"\\t\".join(grp.COLUMN_NAME + \" \" + grp.datatype_lenprec + \" \" + grp.is_null + \",\\n\")\n",
    "\n",
    "new_attribs = (isc\n",
    "               .groupby('TABLE_NAME')\n",
    "               .apply(create_attrib)\n",
    "               .to_frame(name='attribs')\n",
    "              )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_df = pd.merge(ist, new_attribs, on='TABLE_NAME')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_table(tbl_grp):\n",
    "    return \"\\nCREATE TABLE \" + tbl_grp.TABLE_NAME + \" (\\n\\t\" + \\\n",
    "                tbl_grp.attribs + \\\n",
    "            \"\\tPRIMARY KEY ( Id )\\n); \\\n",
    "            \\n\"\n",
    "df_script = (new_df\n",
    "             .groupby('TABLE_NAME')\n",
    "             .apply(create_table)\n",
    "             .to_frame(name='create_table_script')\n",
    "             .reset_index().create_table_script\n",
    "            )\n",
    "df_script"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# write script into a csv\n",
    "## it will write each table as an individual string\n",
    "df_script.to_csv(\"new_script.csv\", index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Select all columns with ID in the name\n",
    "isid = isc.loc[isc.COLUMN_NAME.str.contains('Id'), ['TABLE_NAME', 'COLUMN_NAME']]\n",
    "# drop all the primary key column names\n",
    "isid = isid.loc[isid.COLUMN_NAME != 'Id', :]\n",
    "# extrapolate the foreign table's name from the Id Columns Prefixed name\n",
    "isid['id_prefix'] = isid.COLUMN_NAME.str.extract(r'(?P<foreign>^.*)Id')\n",
    "isid['projected_foreign_table'] = isid.id_prefix + 's'\n",
    "# create column containing boolean of check to see if projected table is in list of tables\n",
    "isid.loc[isid.projected_foreign_table.isin(list(ist.TABLE_NAME)), 'is_table'] = True\n",
    "\n",
    "# display the collection that correspond correctly to the tables\n",
    "istid = isid.loc[isid.is_table == True, :]\n",
    "istid\n",
    "# isid.loc[isid.is_table != True]\n",
    "# isid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sample foreign key constraint format\n",
    "# ALTER TABLE PostTags \n",
    "# ADD CONSTRAINT Fk_PostTags_Posts FOREIGN KEY ( PostId ) \n",
    "# REFERENCES Posts( Id ) ;\n",
    "\n",
    "# create function to script out constraints from table rows\n",
    "def create_fk_constraints(row):\n",
    "    return \"\\n\\\n",
    "ALTER TABLE {tbl_name} \\n\\\n",
    "ADD CONSTRAINT Fk_{tbl_name}_{frgn_tbl} FOREIGN KEY ( {col_name} ) \\n\\\n",
    "REFERENCES {frgn_tbl}( Id ) ;\\n\" \\\n",
    "    .format(tbl_name = row.TABLE_NAME,\\\n",
    "            col_name = row.COLUMN_NAME,\\\n",
    "            frgn_tbl = row.projected_foreign_table\n",
    "           )\n",
    "            \n",
    "# # singleton test\n",
    "# create_fk_constraints(isid.iloc[0,:])\n",
    "\n",
    "# create_fk_script for the standard pk_Table.ID_fk_TableId pairs \n",
    "create_fk_script = (istid\n",
    "                    .apply(create_fk_constraints, axis = 1)\n",
    "                   )\n",
    "\n",
    "# append the fk constraints to the table create script\n",
    "with open('new_script.csv', 'a') as create_table_script:\n",
    "    (create_fk_script\n",
    "     .to_csv(create_table_script, header = False, index = False)\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create pk_fk constraints for the secondary fk's\n",
    "## select out just the non-standard fk_id's referencing the user tables\n",
    "isuid = (isid\n",
    "         .loc[isid.is_table != True, 'TABLE_NAME':'id_prefix']\n",
    "         .loc[isid.COLUMN_NAME.str.contains('User|Moderator'), :]    \n",
    "        )\n",
    "## add in the correct projected table for this Users subsets\n",
    "isuid['projected_foreign_table'] = 'Users'\n",
    "\n",
    "# create a script for the non-standard users foreign keys\n",
    "create_users_fk_script = isuid.apply(create_fk_constraints, axis = 1)\n",
    "\n",
    "# append the non-standard user_id fk constraints to the creation and standard FK_scripts\n",
    "with open('new_script.csv', 'a') as create_table_script:\n",
    "    create_users_fk_script.to_csv(create_table_script, header = False, index = False)\n",
    "\n",
    "isuid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# select what hasn't been handled yet\n",
    "isnuid = isid.loc[(isid.is_table != True) & (isid.COLUMN_NAME.str.contains('User|Moderator') == False), :]\n",
    "isnuid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# anything with Posts in TableName needs to reference Posts Id\n",
    "## except link type Id (1 = relatedPost, 3 = duplicate)\n",
    "## except PostNoticeTypes Table_name\n",
    "\n",
    "ispid = (isnuid\n",
    "         .loc[(isnuid.TABLE_NAME.str.contains('Post')) | \\\n",
    "              (isnuid.COLUMN_NAME.str.contains('Post|DuplicateOfQuestionId')), :\n",
    "             ]\n",
    "         .loc[(isnuid.TABLE_NAME.str.contains('Type') == False) & \\\n",
    "              (isnuid.COLUMN_NAME.str.contains('Type') == False), :\n",
    "             ]\n",
    "        )\n",
    "## add in the correct projected table for this Posts subset\n",
    "ispid['projected_foreign_table'] = 'Posts'\n",
    "\n",
    "# create a script for the non-standard users foreign keys\n",
    "create_posts_fk_script = ispid.apply(create_fk_constraints, axis = 1)\n",
    "\n",
    "# append the non-standard post_id fk constraints to the creation and standard FK_scripts\n",
    "with open('new_script.csv', 'a') as create_table_script:\n",
    "    create_posts_fk_script.to_csv(create_table_script, header = False, index = False)\n",
    "ispid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# gather together all the columns that were \n",
    "from itertools import chain\n",
    "constraints_already_added = list(chain(istid.index.values, isuid.index.values, ispid.index.values))\n",
    "# isid.loc[isid.isin(constraints_already_added), :]\n",
    "isid[~isid.index.isin(constraints_already_added)]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# handling the naming convention outliers\n",
    "\n",
    "## ReviewTaskResults.RejectionReasonId to reference ReviewRejectionReasons.Id\n",
    "create_reviewtaskresults_fk = \"\\n\\\n",
    "ALTER TABLE ReviewTaskResults \\n\\\n",
    "ADD CONSTRAINT Fk_ReviewTaskResults_ReviewRejectionReasons FOREIGN KEY ( RejectionReasonId ) \\n\\\n",
    "REFERENCES ReviewRejectionReasons( Id ) ;\\n\"\n",
    "\n",
    "## ReviewTasks.CompletedByReviewTaskId to reference ReviewTaskResults.Id\n",
    "create_reviewtasks_fk = \"\\n\\\n",
    "ALTER TABLE ReviewTasks \\n\\\n",
    "ADD CONSTRAINT Fk_ReviewTasks_CompletedByReviewTaskId FOREIGN KEY ( CompletedByReviewTaskId ) \\n\\\n",
    "REFERENCES ReviewTaskResults( Id ) ;\\n\"\n",
    "\n",
    "## TagSynonyms.SourceTagName to reference Tags.TagName\n",
    "## TagSynonyms.TargetTagName to reference Tags.TagName\n",
    "create_tagsynonyms_tagnames_fk = \"\\n\\\n",
    "ALTER TABLE TagSynonyms \\n\\\n",
    "ADD CONSTRAINT Fk_TagSynonymsSourceTagName_Tags FOREIGN KEY ( SourceTagName ) \\n\\\n",
    "REFERENCES Tags( TagName ) ;\\n\\\n",
    "ALTER TABLE TagSynonyms \\n\\\n",
    "ADD CONSTRAINT Fk_TagSynonymsTargetTagName_Tags FOREIGN KEY ( TargetTagName ) \\n\\\n",
    "REFERENCES Tags( TagName ) ;\\n\"\n",
    "\n",
    "\n",
    "# combine outlier strings into single series\n",
    "create_singleton_fks = pd.Series([create_reviewtaskresults_fk,\\\n",
    "                                  create_reviewtasks_fk,\\\n",
    "                                  create_tagsynonyms_tagnames_fk])\n",
    "\n",
    "# append the singletons into the script\n",
    "with open('new_script.csv', 'a') as create_table_script:\n",
    "    create_singleton_fks.to_csv(create_table_script, index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# rewrite the script with all of the string quotes and extra line endings removed.\n",
    "def organize_data(location='./new_script.csv'):\n",
    "    with open(location, \"r\", encoding=\"utf-8\") as f:\n",
    "        name_string = f.read()\n",
    "        name_string = name_string.replace('\"\\n\"','')\n",
    "        name_string = name_string.replace('\"', '')\n",
    "    return name_string\n",
    "\n",
    "\n",
    "\n",
    "text_file = open(\"info_schema_create_tables.sql\", \"w\")\n",
    "text_file.write(organize_data())\n",
    "text_file.close()\n",
    "# organize_data().write('quote_free_script.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
